{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MegaClassifier Categories\n",
    "\n",
    "This notebook determines the classes for MegaClassifier to train on. At a high level, this notebook does the following:\n",
    "\n",
    "1. Query MegaDB to figure out how many labeled images there are for each dataset-specific class.\n",
    "2. Build taxonomy tree from the taxonomy CSV, which maps between dataset-specific classes and the taxonomy hierarchy.\n",
    "3. Using MegaDB query results, determine the number of images at each level in the taxonomy hierachy.\n",
    "4. Find the bottom-most \"leaves\" in the taxonomy hiearachy that have the required minimum threshold of images. These leaves are the MegaClassifier categories.\n",
    "5. Save these categories to `label_spec.json` to be used in the classification pipeline.\n",
    "6. (Optional) Graphically plot the categories within the taxonomy hierarchy."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports and constants"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from pprint import pprint\n",
    "from typing import Any, Dict, Iterable, Mapping, Optional, Sequence, Tuple\n",
    "\n",
    "import graphviz as gv\n",
    "import networkx as nx\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "from tqdm.auto import tqdm\n",
    "\n",
    "from taxonomy_mapping.taxonomy_graph import TaxonNode, build_taxonomy_graph, dag_to_tree\n",
    "from data_management.megadb import megadb_utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Typically independent of MegaClassifier version\n",
    "TAXONOMY_CSV_PATH = '../../camera-traps-private/camera_trap_taxonomy_mapping.csv'\n",
    "IMAGES_PER_LEAF_THRESHOLD = 2000\n",
    "\n",
    "# Adjusted per MegaClassifier iteration\n",
    "classifier_name = 'megaclassifier-v0.2'\n",
    "megaclassifier_base = os.path.join('/datadrive/classifier-training/',classifier_name)\n",
    "num_images_csv_path = os.path.join(megaclassifier_base,'num_images_per_dataset.csv')\n",
    "labeled_image_counts_dir = os.path.join(megaclassifier_base,classifier_name + '-labeled_image_counts')\n",
    "label_spec_output_file = os.path.join(megaclassifier_base,classifier_name + '-label_spec.json')\n",
    "os.makedirs(megaclassifier_base,exist_ok=True)\n",
    "os.makedirs(labeled_image_counts_dir,exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load taxonomy CSV and MegaDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(TAXONOMY_CSV_PATH)\n",
    "megadb = megadb_utils.MegadbUtils()\n",
    "ds_table = megadb.get_datasets_table()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Count labeled images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_num_images_per_dataset(datasets: Iterable[str],\n",
    "                               check_existing: Optional[str] = None\n",
    "                              ) -> pd.Series:\n",
    "    \"\"\"Count the total number of images in a dataset based on MegaDB.\n",
    "\n",
    "    If the throughput on the sequences container is set to 10,000 RU/s,\n",
    "    this query should be fairly fast. No more than ~20 minutes for all\n",
    "    ~40 datasets in MegaDB.\n",
    "\n",
    "    Args:\n",
    "        datasets: list of str, names of datasets for which to check the\n",
    "            number of images\n",
    "        check_existing: optional str, path to CSV of existing counts\n",
    "            with exactly two columns ['dataset', 'num_images']\n",
    "\n",
    "    Returns: pd.Series, indexed by dataset name, value is count\n",
    "    \"\"\"\n",
    "    if check_existing is None:\n",
    "        counter = pd.Series(index=pd.Index([], name='dataset'),\n",
    "                            name='num_images', dtype=int)\n",
    "    else:\n",
    "        counter = pd.read_csv(check_existing, squeeze=True, index_col='dataset')\n",
    "\n",
    "    query = 'SELECT VALUE SUM(ARRAY_LENGTH(seq.images)) FROM seq'\n",
    "    # Equivalent query: 'SELECT VALUE COUNT(1) FROM seq JOIN img IN seq.images'\n",
    "\n",
    "    for ds in tqdm(sorted(datasets)):\n",
    "        if ds in counter.index:\n",
    "            tqdm.write(f'{ds} already in existing CSV. Skipping.')\n",
    "            continue\n",
    "\n",
    "        # Sometimes the query will return multiple results (perhaps due to\n",
    "        # CosmosDB paging?), so we just sum the counts.\n",
    "        results = list(megadb.query_sequences_table(query=query, partition_key=ds))\n",
    "        if len(results) > 1:\n",
    "            tqdm.write(f'Got more than one result for {ds}')\n",
    "        counter[ds] = sum(results)\n",
    "        tqdm.write(f'Dataset: {ds}, Count: {counter[ds]}')\n",
    "\n",
    "    counter.sort_index(inplace=True)\n",
    "    return counter\n",
    "\n",
    "num_images_per_dataset = get_num_images_per_dataset(\n",
    "    datasets=ds_table.keys(), check_existing=None)\n",
    "num_images_per_dataset.to_csv(num_images_csv_path, index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def count_labeled_images(datasets: Iterable[str],\n",
    "                         save_dir: str,\n",
    "                         num_images_csv_path: str                         \n",
    "                        ) -> None:\n",
    "    \"\"\"Count the number of labeled images of each class in MegaDB.\n",
    "\n",
    "    An image is counted if the following criteria are all met:\n",
    "    1) Either the sequence is labeled (seq.class) or the image is labeled (img.class).\n",
    "    2) If the image is labeled, it has exactly 1 class.\n",
    "    3) Otherwise, if the sequence is labeled, the sequence has exactly 1 class.\n",
    "\n",
    "    If the throughput on the sequences container is set to 10,000 RU/s,\n",
    "    this query should take ~1 hour for all ~40 datasets in MegaDB.\n",
    "\n",
    "    Args:\n",
    "        datasets: list of str, names of datasets for which to count the\n",
    "            number of labeled images\n",
    "        save_dir: str, path to folder to save one output CSV per dataset,\n",
    "            each CSV has exactly two columns ['class', 'count']\n",
    "        num_images_csv_path: str, path to CSV with the number of total images per\n",
    "            dataset, CSV should have exactly two columns ['dataset', 'count']\n",
    "    \"\"\"\n",
    "    if not os.path.exists(save_dir):\n",
    "        os.makedirs(save_dir)\n",
    "        print('Created query cache dir at:', save_dir)\n",
    "\n",
    "    num_images_per_dataset = pd.read_csv(\n",
    "        num_images_csv_path, squeeze=True, index_col='dataset')\n",
    "    for ds in datasets:\n",
    "        assert ds in num_images_per_dataset.index\n",
    "\n",
    "    query = '''\n",
    "    SELECT\n",
    "        VALUE [[seq.class, img.class], COUNT(1)]\n",
    "    FROM sequences seq JOIN img IN seq.images\n",
    "    WHERE\n",
    "        (ARRAY_LENGTH(img.class) = 1)\n",
    "        OR\n",
    "        (NOT ISDEFINED(img.class) AND ARRAY_LENGTH(seq.class) = 1)\n",
    "    GROUP BY [seq.class, img.class]\n",
    "    '''\n",
    "\n",
    "    for ds in tqdm(sorted(datasets)):\n",
    "        save_path = os.path.join(save_dir, f'{ds}.csv')\n",
    "        if os.path.exists(save_path):\n",
    "            tqdm.write(f'Saved class counts for {ds} already exist. Skipping.')\n",
    "            continue\n",
    "\n",
    "        tqdm.write(f'Querying {ds}')\n",
    "        results = list(megadb.query_sequences_table(query=query, partition_key=ds))\n",
    "\n",
    "        counter = pd.Series(index=pd.Index([], name='class'), name='count', dtype=int)\n",
    "        for combined_classes, count in results:\n",
    "            if len(combined_classes) > 1:\n",
    "                tqdm.write('- Has both seq and img class. Using img class.')\n",
    "                tqdm.write(f'- {combined_classes}')\n",
    "                img_classes = combined_classes[1]\n",
    "            else:\n",
    "                img_classes = combined_classes[0]\n",
    "            assert len(img_classes) == 1\n",
    "            img_class = img_classes[0]\n",
    "\n",
    "            if img_class not in counter:\n",
    "                counter[img_class] = 0\n",
    "            counter[img_class] += count\n",
    "\n",
    "        num_labeled_images = counter.sum()\n",
    "        num_images = num_images_per_dataset[ds]\n",
    "        assert num_labeled_images <= num_images\n",
    "        tqdm.write(f'- num labeled images: {num_labeled_images}, num_images: {num_images}')\n",
    "\n",
    "        counter.sort_index(inplace=True)  # sort counter by class\n",
    "        counter.to_csv(save_path, index=True)\n",
    "\n",
    "count_labeled_images(datasets=ds_table.keys(),\n",
    "                     save_dir=labeled_image_counts_dir,\n",
    "                     num_images_csv_path=num_images_csv_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_label_counts = {}  # (dataset, dataset_class) => count\n",
    "for ds in sorted(ds_table.keys()):\n",
    "    sr = pd.read_csv(f'labeled_image_counts/{ds}.csv', squeeze=True, index_col='class')\n",
    "    sr.index = sr.index.map(lambda x: (ds, x))\n",
    "    ds_label_counts.update(sr.to_dict())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build taxonomy tree\n",
    "\n",
    "1. Build taxonomy DAG.\n",
    "2. Remove humans. Leave human detection to MegaDetector, instead of MegaClassifier.\n",
    "3. Convert DAG to tree. For the purposes of MegaClassifier, we require that there be exactly 1 true taxonomy hierarchy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "graph, taxon_to_node, label_to_node = build_taxonomy_graph(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "human_node = taxon_to_node[('species', 'homo sapiens')]\n",
    "for n in nx.descendants(graph, human_node):\n",
    "    graph.remove_node(n)\n",
    "graph.remove_node(human_node)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tree = dag_to_tree(graph, taxon_to_node)\n",
    "num_connected_components = nx.number_weakly_connected_components(tree)\n",
    "\n",
    "root_nodes = []\n",
    "for component in nx.weakly_connected_components(tree):\n",
    "    # Each component is a set of nodes\n",
    "    subgraph = tree.subgraph(component)\n",
    "    assert nx.is_tree(subgraph)\n",
    "    root_nodes.append([n for n, d in subgraph.in_degree() if d == 0][0])\n",
    "assert len(root_nodes) == num_connected_components\n",
    "\n",
    "print('Total number of nodes:', len(tree.nodes))\n",
    "print('Number of disconnected components:', num_connected_components)\n",
    "pprint(root_nodes)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Populate image counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_total_img_count(node: TaxonNode) -> int:\n",
    "    \"\"\"Recursively calculates the total number of images in the subtree\n",
    "    rooted at the given node.\n",
    "    \"\"\"\n",
    "    if hasattr(node, 'total_img_count'):\n",
    "        return node.total_img_count\n",
    "    elif len(node.children) == 0:\n",
    "        node.total_img_count = node.img_count\n",
    "        return node.total_img_count\n",
    "    else:\n",
    "        children_img_count = sum(get_total_img_count(c) for c in node.children)\n",
    "        node.total_img_count = node.img_count + children_img_count\n",
    "        return node.total_img_count\n",
    "\n",
    "\n",
    "def populate_image_counts(graph: nx.DiGraph,\n",
    "                          ds_label_counts: Mapping[Tuple[str, str], int],\n",
    "                          label_to_node: Mapping[Tuple[str, str], TaxonNode]) -> None:\n",
    "    \"\"\"Adds 2 properties to every node in graph:\n",
    "    - img_count: int, number of images exactly for this taxon node\n",
    "    - total_img_count: int, number of images in the subtree rooted at this node\n",
    "    \"\"\"\n",
    "    for node in graph.nodes:\n",
    "        node.img_count = 0\n",
    "\n",
    "    labels_not_found = []\n",
    "    for label, count in ds_label_counts.items():\n",
    "        if label not in label_to_node:\n",
    "            labels_not_found.append(label)\n",
    "            continue\n",
    "        node = label_to_node[label]\n",
    "        if node not in graph.nodes:\n",
    "            print(f'Node {node} not in graph.nodes')\n",
    "            continue\n",
    "        node.img_count += count\n",
    "\n",
    "    print('labels not found:')\n",
    "    pprint(labels_not_found)\n",
    "\n",
    "    for subgraph in nx.weakly_connected_components(graph):\n",
    "        # Each subgraph is a set of (taxon_level, taxon_name)\n",
    "\n",
    "        # Get root node\n",
    "        subgraph = graph.subgraph(subgraph)\n",
    "        root_nodes = [n for n, d in subgraph.in_degree() if d==0]\n",
    "        assert len(root_nodes) == 1\n",
    "        root_node = root_nodes[0]\n",
    "\n",
    "        print(f'Graph rooted at {root_node} has {len(subgraph.nodes)} nodes')\n",
    "        print('    Total image count:', get_total_img_count(root_node))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "populate_image_counts(tree, ds_label_counts, label_to_node)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Find leaf nodes containing at least a certain threshold of images\n",
    "\n",
    "Top-down tree search. We define a \"leaf\" as a tuple of nodes such that:\n",
    "\n",
    "1. All nodes in the leaf have the same parent.\n",
    "2. The sum of the `total_img_count` properties on the nodes exceeds the threshold.\n",
    "3. None of the children of the nodes has a `total_img_count` greather than the threshold."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_leaf_nodes(root_nodes: Iterable[TaxonNode], threshold: int\n",
    "                  ) -> Dict[Tuple[TaxonNode, ...], int]:\n",
    "    \"\"\"Given a list of nodes representing the root of trees, returns a dict\n",
    "    mapping each \"leaf\" to total image count belonging to that leaf.\n",
    "    \"\"\"\n",
    "    leaf: Tuple[TaxonNode, ...]  # \n",
    "    all_leaves = set()\n",
    "    leaves_to_count: Dict[Tuple[TaxonNode, ...], int] = {}  # tuple of TaxonNode => int\n",
    "\n",
    "    candidate_nodes = list(root_nodes)  # make a shallow copy\n",
    "    while len(candidate_nodes) > 0:\n",
    "        node = candidate_nodes.pop()\n",
    "        if node.total_img_count < threshold:\n",
    "            continue\n",
    "\n",
    "        children = node.children\n",
    "        if len(children) == 0 or all(c.total_img_count < threshold for c in children):\n",
    "            assert node not in all_leaves\n",
    "            leaves_to_count[(node,)] = node.total_img_count\n",
    "            all_leaves.add(node)\n",
    "            continue\n",
    "\n",
    "        # Pop off any children that exceed the threshold\n",
    "        remaining_children = []\n",
    "        for c in children:\n",
    "            if c.total_img_count >= threshold:\n",
    "                candidate_nodes.append(c)\n",
    "            else:\n",
    "                remaining_children.append(c)\n",
    "\n",
    "        summed_count = sum(c.total_img_count for c in remaining_children)\n",
    "        if summed_count >= threshold:\n",
    "            assert all(c not in all_leaves for c in remaining_children)\n",
    "            leaf = tuple(remaining_children)\n",
    "            leaves_to_count[leaf] = summed_count\n",
    "            all_leaves.update(remaining_children)\n",
    "\n",
    "    return leaves_to_count\n",
    "\n",
    "\n",
    "def leaf_to_name(leaf: Sequence[TaxonNode]) -> str:\n",
    "    if len(leaf) == 1:\n",
    "        node = leaf[0]\n",
    "        leaf_name = f'{node.level}: {node.name}'\n",
    "    else:\n",
    "        parent = leaf[0].parents[0]\n",
    "        assert TaxonNode.lowest_common_ancestor(leaf) is parent\n",
    "        leaf_name = f'{parent.level}: {parent.name} (other)'\n",
    "    return leaf_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "leaves_to_count = get_leaf_nodes(root_nodes, threshold=IMAGES_PER_LEAF_THRESHOLD)\n",
    "num_leaves = len(leaves_to_count)\n",
    "print('Total number of leaves:', num_leaves)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(list(leaves_to_count.values()), bins=np.arange(0, 40001, 1000).tolist() + [1e8])\n",
    "plt.xlim(0, 41000)\n",
    "plt.ylabel('num leaves')\n",
    "plt.xlabel('num images in leaf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sort leaves by count\n",
    "display({\n",
    "    leaf_to_name(leaf): leaves_to_count[leaf]\n",
    "    for leaf in sorted(leaves_to_count.keys(), key=leaves_to_count.__getitem__, reverse=True)\n",
    "})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Write out to label spec JSON"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def leaves_to_label_spec(leaves: Iterable[Sequence[TaxonNode]],\n",
    "                         max_count: Optional[int] = None) -> Dict[str, Any]:\n",
    "    \"\"\"Creates a classification label specification from a list of leaves.\"\"\"\n",
    "    label_spec = {}\n",
    "    for leaf in leaves:\n",
    "        class_name = leaf_to_name(leaf)\n",
    "        assert class_name not in label_spec\n",
    "\n",
    "        taxa_list = []\n",
    "        for node in leaf:\n",
    "            taxa_list.append({\n",
    "                'level': node.level,\n",
    "                'name': node.name\n",
    "            })\n",
    "\n",
    "        taxa_dict: Dict[str, Any] = {'taxa': taxa_list}\n",
    "        if max_count is not None:\n",
    "            taxa_dict['max_count'] = max_count\n",
    "        label_spec[class_name] = taxa_dict\n",
    "\n",
    "    # Sort label_spec by class_name\n",
    "    label_spec = {\n",
    "        class_name: label_spec[class_name]\n",
    "        for class_name in sorted(label_spec.keys())\n",
    "    }\n",
    "    return label_spec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Target dataset size: 1.4M images (roughly the same size as ImageNet)\n",
    "#\n",
    "# We assume that we get one good crop every three images.\n",
    "#\n",
    "# Even so, this will result in a smaller dataset, because not all classes even have\n",
    "# `crops_per_class` images to begin with.\n",
    "\n",
    "crops_per_class = 1.4e6 / num_leaves\n",
    "label_spec = leaves_to_label_spec(leaves_to_count.keys(), max_count=int(crops_per_class * 3))\n",
    "with open(label_spec_output_file, 'w') as f:\n",
    "    json.dump(label_spec, f, indent=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot classes graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def draw_leaves_graph(tree: nx.DiGraph,\n",
    "                      leaves_to_count: Mapping[Tuple[TaxonNode, ...], int]\n",
    "                     ) -> gv.Digraph:\n",
    "    \"\"\"\n",
    "    See:\n",
    "    \n",
    "      http://www.graphviz.org/doc/info/attrs.html\n",
    "    \n",
    "    ...for a description of the different graphviz attributes.\n",
    "    \"\"\"\n",
    "    # create a nx.DiGraph containing only the leaves and their ancestors\n",
    "    subtree_nodes = set()\n",
    "    for leaf in leaves_to_count:\n",
    "        for node in leaf:\n",
    "            subtree_nodes.add(node)\n",
    "            for ancestor in nx.ancestors(tree, node):\n",
    "                subtree_nodes.add(ancestor)\n",
    "    subtree = tree.subgraph(subtree_nodes)\n",
    "\n",
    "    # create a gv.Digraph\n",
    "    gv_g = gv.Digraph(\n",
    "        graph_attr=dict(overlap='false', concentrate='true', ranksep='2'),\n",
    "        node_attr=dict(margin='0', width='0', height='0'))\n",
    "\n",
    "    # add the leaves\n",
    "    for i, (leaf, count) in enumerate(leaves_to_count.items()):\n",
    "        name = f'cluster_{i}'\n",
    "        leaf_label = leaf_to_name(leaf) + '\\n' + f'count: {count}'\n",
    "        attr = dict(color='blue', label=leaf_label)\n",
    "        node_attr = dict(style='filled')\n",
    "        with gv_g.subgraph(name=name, graph_attr=attr, node_attr=node_attr) as c:\n",
    "            for n in leaf:\n",
    "                n_id = f'{n.level}\\n{n.name}'\n",
    "                c.node(n_id)\n",
    "                subtree_nodes.remove(n)\n",
    "\n",
    "    # add the remaining nodes\n",
    "    for n in subtree_nodes:\n",
    "        n_id = f'{n.level}\\n{n.name}'\n",
    "        gv_g.node(n_id)\n",
    "\n",
    "    # add the edges\n",
    "    for n1, n2 in subtree.edges:\n",
    "        n1_id = f'{n1.level}\\n{n1.name}'\n",
    "        n2_id = f'{n2.level}\\n{n2.name}'\n",
    "        gv_g.edge(n1_id, n2_id)\n",
    "\n",
    "    return gv_g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "leaves_graph = draw_leaves_graph(tree, leaves_to_count=leaves_to_count)\n",
    "leaves_graph.render('megaclassifier_groups', format='svg', cleanup=True)\n",
    "leaves_graph.render('megaclassifier_groups', format='pdf', cleanup=True)\n",
    "display(leaves_graph)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
